FML 读取

读取时有点儿问题,后来发现每条最后都是以 "FML" 结尾。

#! /usr/bin/python
# coding:cp936
# CopyRight 2011 Adou XD , All Rights Reserved

import os
import sys
import urllib
import urllib2
import re


class fml() :
    '''FMyLife
    '''

    sites = ("http://www.fmylife.com/%s?page=%d",    # home
             "http://www.fmylife.com/tops/%s/day?page=%d", # day
             "http://www.fmylife.com/tops/%s/week?page=%d", # week
             "http://www.fmylife.com/tops/%s/month?page=%d", # month
             "http://www.fmylife.com/tops/%s?page=%d" # all time
             )
    sorts = ( 'top' ,
              'flop' ,
              'comment',
              'favorite'
            )
    
    def __init__(self):
        self.page = 0
        self.site = fml.sites[0]
        self.sortt = ''


    def chose(self) :
        print 'Time Period:'
        print '---1-- Home'
        print '---2-- Top of the day'
        print '---3-- Top of the week'
        print '---4-- Top of the month'
        print '---5-- All time'
        
        num = -1
        while num<0 or num>=len(fml.sites) :
            num = input(">>") - 1
        self.site = fml.sites[num]

        if num != 0 :
            self.chosesort()
    
    def chosesort(self) :
        print 'Sorted by :'
        print '---1-- Agreed'
        print '---2-- Deserved'
        print '---3-- Comment'
        print '---4-- Favorite'

        num = -1
        while num<0 or num>=len(fml.sorts) :
            num = input(">>") - 1
        self.sortt = fml.sorts[num]

    def read(self) :
        '''read one page
        '''
        url = self.site % (self.sortt,self.page)
        self.page += 1
        sdata = urllib2.urlopen(url).read()
        content = re.findall(r'(?<=class="fmllink">).*?(?=</a>)',sdata)

        msg = []
        story = ''
        for line in content :
            story += line
            if 'FML' in line:
                msg.append(story)
                story = ''
            
        return msg


if __name__ == "__main__":
    fm = fml()
    fm.chose()
    msg = fm.read()

    for ms in msg :
        print
        print ms